#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# Installing required packages
get_ipython().system('pip install openai')
get_ipython().system('pip install nltk')
get_ipython().system('pip install pandas')


# In[1]:


# Importing libraries
import os
import openai
import io
import pandas as pd
# Download NLTK resources
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
# Using Azure OpenAI, replace the keys below
openai.api_type = "azure"
openai.api_base = "INSERT URI HERE"
openai.api_version = "INSERT API VERSION HERE"
openai.api_key = "INSERT KEY HERE"


# In[3]:


# Example with a small dataset
data = {
    'Full Name': ['Alice Johnson', 'Bob Smith', 'Charlie Brown', 'David Miller', 'Eva White'],
    'Address': ['123 Main St', '456 Oak Ave', '789 Pine Ln', '101 Elm Blvd', '202 Cedar Dr'],
    'Age': [25, 30, 22, 35, 28],
    'reviews': [
    "The food at this restaurant was absolutely amazing! Each dish was a delightful experience, and the service was top-notch.",
    "Unfortunately, my experience at this restaurant was less than satisfactory. The service was slow, and the food arrived cold. I wouldn't recommend it.",
    "I enjoyed the atmosphere of the restaurant, but the portions were disappointingly small. I left feeling hungry despite the tasty dishes.",
    "The service was excellent, and the dishes were delicious. I will definitely come back to this restaurant in the future.",
    "My visit to this restaurant was terrible. The staff was rude, and the food had no flavor. I won't be returning."
]
}

original_data = pd.DataFrame(data)


# In[4]:


original_data


# In[5]:


prompt = "You are an AI Assistant that generates 5 synthetic data points upon user request. Only return the markdown table. Do not return additional text. Create new reviews"
query = f"Generate 5 new synthetic data with similar distribution, pattern and sentiment to the following data. Write new reviews:\n\n{original_data.to_markdown(index=False)}"

# Make a request to OpenAI GPT-3.5. 
response = openai.ChatCompletion.create(
    engine="INSERT MODEL NAME HERE", 
    messages=[
        {"role": "system", "content": prompt},
        {"role": "user", "content": query}
    ],
    temperature=0.7,
    stop=None
)


# In[6]:


# Function to get sentiment score
def get_sentiment_score(text):
    sid = SentimentIntensityAnalyzer()
    sentiment_scores = sid.polarity_scores(text)
    return sentiment_scores['compound']

# Add a new column 'sentiment_score' to the DataFrame
original_data['sentiment_score'] = original_data['reviews'].apply(get_sentiment_score)

# Display the DataFrame
original_data


# In[7]:


generated_text=response['choices'][0]['message']['content']


# In[8]:


generated_text


# In[9]:


cleaned_string = '\n'.join(['|' + '|'.join([cell.strip() for cell in row.split('|')[1:-1]]) + '|' for row in generated_text.strip().split('\n')])

# Use pandas to read the formatted string into a DataFrame
df = pd.read_csv(io.StringIO(cleaned_string), sep="|", index_col=False)


# In[10]:


df = df.dropna(axis=1, how='all')
df=df = df.drop(0, axis=0)
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')


# In[11]:


df


# In[14]:


df['sentiment_score'] = df['reviews'].apply(get_sentiment_score)
df

